
local dirAF ""~/Dropbox/Covid Info Survey/replication_package/""
local dirBZ ""
local dirEF ""
local dirVP ""C:/Users/pezone/AppData/Local/Dropbox/Dropbox/Covid Info Survey/replication_package/""
local dirVPmac ""/Users/vincenzopezone/Dropbox/Covid Info Survey/replication_package/""

cap cd `dirVPmac'
cap cd `dirAF'
cap cd `dirVP'

*** w determines how variables are winsorized
local w 025

/*** Import Excel data (Wave 1) ***/

import excel "survey_1.xlsx", sheet("Sheet0") firstrow clear

keep if gc=="1" | gc=="4"

/*** Screeners ***/

label variable Q169 "region"

label variable Q25 "age"

label variable Q26 "gender"

label variable Q185 "interested in sports"

label variable Q186 "interested in music"

/*** Priors ***/

label variable Q0 "infected by covid"

label variable Q1_1 "worried health"

label variable Q2_1 "worried economy"

label variable Q3_4 "probability infected (prior)"

label variable Q4 "nowcast deaths"

label variable Q5_1 "confident nowcast deaths"

label variable Q6 "forecast deaths (priors)"

label variable Q7_1 "probability deaths>200,000 (prior)"

label variable Q8 "nowcast UE"

label variable Q9_1 "confident nowcast UE"

label variable Q10 "forecast UE (priors)"

label variable Q11_1 "probability UE>20 (prior)"

label variable Q12_1 "measures evaluation"

/*** Treatment ***/

label variable Q13 "health vs economy"

label variable Q14 "economy headlines"

label variable Q14_DO "economy headlines - order"

label variable Q15 "health headlines"

label variable Q15_DO "health headlines - order"

label variable Q16 "headline just read"

label variable Q17_1 "article informativeness"

label variable Q18_1 "article reliability"

label variable Q19_1 "Probability Breitbart"
label variable Q19_2 "Probability CNN"
label variable Q19_3 "Probability Fox News"
label variable Q19_4 "Probability MSNBC"
label variable Q19_5 "Probability New York Times"
label variable Q19_6 "Probability USA Today"
label variable Q19_7 "Probability WSJ"
label variable Q19_8 "Probability WP"
label variable Q19_9 "Probability Other news sources"

gen p_liberal=Q19_4+Q19_5+Q19_8+Q19_2

gen p_conservative=Q19_1+Q19_3+Q19_7

gen p_lib_cons=p_liberal-p_conservative


label variable Q21_choice1_1 "spent advertising news source"
/*** Posteriors ***/

label variable Q22 "forecast deaths (posterior)"

label variable Q23_1 "probability deaths>200,000 (posterior)"

label variable Q24 "forecast UE (posterior)"

label variable Q25_1 "probability UE>20 (posterior)"

label variable Q26_4 "probability infected (posterior)"

label variable Q27 "What should politicians do?"

/*** Demographics ***/

label variable Q29 "degree"

label variable Q29_8_TEXT "degree (other)"

label variable Q30 "employment situation"

label variable Q30_10_TEXT "employment situation (other)"
label variable Q31 "loss of employment?"

label variable Q32 "hispanic"

label variable Q33 "race"

label variable Q33_6_TEXT "race (other)"

label variable Q34 "health"

label variable Q35 "conditions"

label variable Q36 "health insurance"

label variable Q37 "political views"

label variable Q38 "who vote for president"

label variable Q39 "where do you get info about Covid"

label variable Q39 "where do you get info about Covid (other)"

label variable Q40 "live with a partner"

label variable Q41 "ZIP code"

label variable Q42 "own residence"

label variable Q43_1 "people below 6"

label variable Q43_2 "people age 7-18"

label variable Q43_3 "people age 19-65"

label variable Q43_4 "people above 65"

label variable Q44 "actions where you live"

label variable Q44_6_TEXT "actions where you live (other)"

label variable Q45 "hold stock or 401k"

label variable Q46_1 "prepared to take risks"

label variable Q47_1 "willing to give up today for tomorrow"

label variable Q181_1 "can people be trusted?"

label variable Q48_1 "income"

label variable Q212_1 "income (fixed drill down)"

label variable Q50_choice2_1 "money to WHO"

label variable Q50_choice2_2 "money to FreedomWorks"

label variable Q51 "how interesting the survey"

label variable Q52 "comments on the survey"

label variable FL_130_DO "assigned article - optimistic economy preferred"

label variable FL_144_DO "assigned article - pessimistic economy preferred"

label variable FL_139_DO "assigned article - pessimistic health preferred"

label variable FL_148_DO "assigned article - optimistic health preferred"

/*** Generate date and time variables ***/

gen date = dofc(StartDate)
gen hour=hh(StartDate)
gen minute=mm(StartDate)
gen second=ss(StartDate)

*** Drop pilot observations

format %td date
drop if  date<td(15may2020)
*** Drop "wrong" observations

local X 8 10 24
foreach x in `X' {

	drop  if Q`x'>100

}


*** Check quotas

*** Geographic location

tab Q169
/*** 

     region |      Freq.     Percent        Cum.
------------+-----------------------------------
    Midwest |        425       21.44       21.44
  Northeast |        350       17.66       39.10
      South |        745       37.59       76.69
       West |        462       23.31      100.00

Theoretical frequencies should be: 21.33%, 18.02%, 37.27%, 23.38%

***/

*** Gender

tab Q26

/*** 


     gender |      Freq.     Percent        Cum.
------------+-----------------------------------
     Female |      1,020       51.46       51.46
       Male |        962       48.54      100.00
------------+-----------------------------------


Theoretical frequencies should be: 21%, 49%.

***/

*** Age


tab age

/***


        age |      Freq.     Percent        Cum.
------------+-----------------------------------
      18-24 |        253       12.76       12.76
      25-34 |        351       17.71       30.47
      35-44 |        328       16.55       47.02
      45-54 |        356       17.96       64.98
      55-64 |        324       16.35       81.33
        65+ |        370       18.67      100.00
------------+-----------------------------------

Theoretical frequencies should be 12.8%, 17.7%, 16.7%, 17.7%, 16.4%, 18.8%.

***/

*** Check survey duration

gen duration_minutes=Duration/60

sum duration_minutes,d

/***


-------------------------------------------------------------
      Percentiles      Smallest
 1%          6.2              6
 5%     6.866667              6
10%          7.8       6.033333       Obs               1,982
25%     9.733334       6.033333       Sum of Wgt.       1,982

50%        13.35                      Mean           18.00338
                        Largest       Std. Dev.      27.27214
75%        18.45       370.0667
90%     26.56667            392       Variance       743.7696
95%     36.03333       451.7667       Skewness       12.75611
99%        94.35         643.95       Kurtosis       222.6442


***/

*** Attention screeners

tab Q185

/***


    interested in sports |      Freq.     Percent        Cum.
-------------------------+-----------------------------------
Very strongly interested |      1,982      100.00      100.00
-------------------------+-----------------------------------

***/

tab Q186

/***


  interested in music |      Freq.     Percent        Cum.
----------------------+-----------------------------------
Not at all interested |      1,982      100.00      100.00
----------------------+-----------------------------------

***/

*** Headline chosen

gen domain_chosen="H" if strpos(Q13,"health")>0
replace domain_chosen="E" if strpos(Q13,"economic")>0

tab domain_chosen

*** Headline check

gen article_assigned=FL_130_DO+ FL_144_DO +FL_139_DO +FL_148_DO
gen headline_check="PH" if Q16=="Two influential forecasting models predict sharp rise in coronavirus deaths"
replace headline_check="PE" if Q16=="Highest unemployment rate since the Depression era, and many jobs may not come back"
replace headline_check="OE" if Q16=="Hope for swift economic recovery builds as businesses reopen"
replace headline_check="OH" if Q16=="New data suggest the coronavirus is less deadly than we thought"

gen fraction_ok= (headline_check==article_assigned)

sum fraction_ok

/***

    Variable |        Obs        Mean    Std. Dev.       Min        Max
-------------+---------------------------------------------------------
 fraction_ok |      1,982    .7951564    .4036891          0          1


***/



*** Check randomization

tab article_assigned if domain_chosen=="E" & strpos(Q14,"Highest unemployment rate")>0

/***

article_ass |
      igned |      Freq.     Percent        Cum.
------------+-----------------------------------
         OE |        138       50.92       50.92
         PE |        133       49.08      100.00
------------+-----------------------------------
      Total |        271      100.00

***/

tab article_assigned if domain_chosen=="E" & strpos(Q14,"swift economic recovery")>0

/***

article_ass |
      igned |      Freq.     Percent        Cum.
------------+-----------------------------------
         OE |        220       50.11       50.11
         PE |        219       49.89      100.00
------------+-----------------------------------
      Total |        439      100.00

***/

tab article_assigned if domain_chosen=="H" & strpos(Q15,"less deadly")>0

/***

article_ass |
      igned |      Freq.     Percent        Cum.
------------+-----------------------------------
         OH |        295       49.83       49.83
         PH |        297       50.17      100.00
------------+-----------------------------------
      Total |        592      100.00


	  
***/

tab article_assigned if domain_chosen=="H" & strpos(Q15,"forecasting models")>0

/***

article_ass |
      igned |      Freq.     Percent        Cum.
------------+-----------------------------------
         OH |        350       51.47       51.47
         PH |        330       48.53      100.00
------------+-----------------------------------
      Total |        680      100.00

***/


*** Check time


gen time_spent_article=time_spent_OE_PageSub if time_spent_OE_PageSub!=.
replace time_spent_article=time_spent_PE_PageSub if time_spent_PE_PageSub!=.

replace time_spent_article=time_spent_PH_PageSub if time_spent_PH_PageSub!=.
replace time_spent_article=time_spent_OH_PageSub if time_spent_OH_PageSub!=.
gen time_spent_article_minutes=time_spent_article/60

sum time_spent_article_minutes,d


/***


                 time_spent_article_minutes
-------------------------------------------------------------
      Percentiles      Smallest
 1%     .0332333       .0182833
 5%        .0521          .0219
10%        .0718          .0222       Obs               1,982
25%     .3570167       .0233333       Sum of Wgt.       1,982

50%     1.289975                      Mean            1.85266
                        Largest       Std. Dev.      5.111324
75%      2.20315        46.3091
90%       3.3397       54.21113       Variance       26.12563
95%      4.51875       90.36495       Skewness       24.36845
99%     11.52618       177.8033       Kurtosis        764.119

***/

bysort article_assigned: sum time_spent_article_minutes,d

/***

-> article_assigned = OE

                 time_spent_article_minutes
-------------------------------------------------------------
      Percentiles      Smallest
 1%       .03465         .02835
 5%        .0574            .03
10%     .0780167       .0309167       Obs                 358
25%       .49055         .03465       Sum of Wgt.         358

50%       1.2452                      Mean            1.80404
                        Largest       Std. Dev.      4.952539
75%      2.13975       8.214633
90%     3.244433          8.238       Variance       24.52765
95%      4.10205        14.1658       Skewness       16.11351
99%     8.214633       90.36495       Kurtosis       287.3689


-> article_assigned = OH

                 time_spent_article_minutes
-------------------------------------------------------------
      Percentiles      Smallest
 1%     .0321333       .0182833
 5%     .0473667         .02365
10%     .0650167       .0254833       Obs                 645
25%        .2733       .0283333       Sum of Wgt.         645

50%     1.298667                      Mean           1.771505
                        Largest       Std. Dev.      3.221159
75%     2.226533       11.60748
90%        3.246       11.66183       Variance       10.37586
95%      4.51875        46.3091       Skewness       11.17392
99%     10.69185       54.21113       Kurtosis       167.0911

-> article_assigned = PE

                 time_spent_article_minutes
-------------------------------------------------------------
      Percentiles      Smallest
 1%         .041       .0233333
 5%       .06125         .03335
10%       .09875       .0348833       Obs                 352
25%        .5187           .041       Sum of Wgt.         352

50%       1.3739                      Mean           1.765805
                        Largest       Std. Dev.      2.132678
75%     2.324042       10.99425
90%     3.490917        15.0003       Variance       4.548317
95%     4.568583       15.37982       Skewness       4.106432
99%     10.99425       20.13755       Kurtosis       28.03465

-> article_assigned = PH

                 time_spent_article_minutes
-------------------------------------------------------------
      Percentiles      Smallest
 1%     .0330833          .0219
 5%        .0508          .0222
10%     .0682333         .02795       Obs                 627
25%     .2848167       .0285667       Sum of Wgt.         627

50%     1.285733                      Mean           2.012665
                        Largest       Std. Dev.      7.444812
75%       2.1438       15.93765
90%     3.412017       18.52612       Variance       55.42523
95%       4.8477       32.35686       Skewness       21.22924
99%     13.97793       177.8033       Kurtosis       497.9515

***/

*** Generate sample restrictions based on time spent on first questions


gen time_before=0

forval j=1/9 {

	replace time_before=time_before+time`j'_PageSubmit

}

*** Time spent on question Q12 was labeled Q153; we add that as well

replace time_before=time_before+Q153_PageSubmit

gen time_before_minutes=time_before/60

*** For each cutoff, we generate two samples: One that exclude only observations at the bottom, and one that also excludes also at the top,
*** using the same percentile.

local X 5 7.5 10

foreach x in `X' {
	
	egen min=pctile(time_before_minutes), p(`x')
	
	local y=100-`x'
	
	egen max=pctile(time_before_minutes), p(`y')
	
	local z=subinstr("`x'",".","_",.)
	
	local q=subinstr("`y'",".","_",.)

	gen sample_`z'=(time_before_minutes>min)
	
	gen sample_`z'_`q'=(time_before_minutes>min & time_before_minutes<max)
	
	drop min max
	
	local S `S' `z' `z'_`q'
	
	display "`S'"

	
}

*** Note: sample_X=sample excludes observations in the bottom Xth percentile;
*** sample_X_Y excludes observations below the Xth percentile and above the Yth percentile



*************************************************************
******* Generate variables used in regressions **************
*************************************************************

*** Main variables of interest

encode Q0,gen(infected)
label variable infected "Already Infected (Cntd)"

gen already_infected=(Q0=="Yes, definitely - confirmed by test" | Q0=="Very likely - did not get tested, but had symptoms")

label variable already_infected "Already Infected"

gen worried_health=  Q1_1 
label variable worried_health "Worried - Health"

gen worried_economy= Q2_1 
label variable worried_economy "Worried - Economy"

gen probability_infected= Q3_4 
label variable probability_infected "Prob. Infected by Year End"

gen nowcast_deaths=Q4/1000
label variable nowcast_deaths "Nowcast Deaths (in 1,000s)"

*** Nowcast deaths takes implausible values; for now, we winsorize it
winsor nowcast_death,p(.`w') gen(nowcast_deaths2)
label variable nowcast_deaths2 "Nowcast Deaths"


gen lnowcast_deaths=log(Q4)
label variable lnowcast_deaths "Log(Nowcast Deaths)"

gen confident_nowcast_deaths=real(Q5_1)
replace confident_nowcast_deaths=1 if strpos(Q5_1,"1")>0
replace confident_nowcast_deaths=5 if strpos(Q5_1,"5")>0
replace confident_nowcast_deaths=3 if strpos(Q5_1,"3")>0

label variable confident_nowcast_deaths "Confident in Nowcast Deaths"

gen prob_deaths_200=Q7_1
label variable prob_deaths_200 "Pr. Deaths\textgreater 200K"

gen nowcast_ue=Q8
label variable nowcast_ue "Nowcast UE"

gen confident_nowcast_ue=real(Q9_1)
replace confident_nowcast_ue=1 if strpos(Q9_1,"1")>0
replace confident_nowcast_ue=5 if strpos(Q9_1,"5")>0
replace confident_nowcast_ue=3 if strpos(Q9_1,"3")>0

label variable confident_nowcast_ue "Confident in Nowcast UE"

gen forecast_ue_prior=Q10
label variable forecast_ue_prior "Forecast UE (Prior)"

gen prob_ue_20=Q11
label variable prob_ue_20 "Pr. UE\textgreater 20"


gen health_domain=(strpos(Q13,"health")>0)*100


label variable health_domain "Health Dom."

gen pessimistic_economy=(strpos(Q14,"Highest unemployment")>0)*100

label variable pessimistic_economy "Pessimistic Headline - Economy"

gen pessimistic_health=(strpos(Q15,"Two influential")>0)*100

label variable pessimistic_health "Pessimistic Headline - Health"


**************** Demographics ************************

gen female=(Q26=="Female")
label variable female "Female"

encode Q29,gen(degree)
label variable degree "School Degree"

encode Q30,gen(employment)
label variable employment "Employment Situation"

gen employment_loss_hh=(Q31=="Yes")
label variable employment_loss_hh "Empl. Loss in HH"

*** Skip race (Q33) for now, as it has multiple answers possible

encode Q34,gen(health)

label variable health "Health"

g Q35x = length(Q35)

g health_conditions = 0 if Q35x==0
replace health_conditions = 1 if inlist(Q35x,28,52,97)
replace health_conditions = 3 if Q35x == 179
replace health_conditions = 2 if health_conditions==.

label variable health_conditions "Health Cnds. (0-3)" // AF changed label

gen health_insurance=(Q36=="Yes")
label variable health_insurance "Health Insurance"

encode Q37,gen(political_views)
label variable political_views "Political Views"

encode Q38,gen(vote_for)
label variable vote_for "Planning to vote for..."

*** Skip news sources on Covid (Q39) as it has multiple answers possible

gen married=(Q40=="Yes")
label variable married "Married"

gen zip=real(Q41)

gen own_house=(Q42=="Yes")
label variable own_house "Own Primary Residence"

*** # people in HH (ignore age structure for now)
gen n_hh=real(Q43_1)+real(Q43_2)+real(Q43_3)+real(Q43_4)
label variable n_hh "People in HH"

gen n_old=real(Q43_4)
replace n_old=2 if n_old>2
label variable n_old "Nr Ppl. above 65 in HH" // AF changed label

encode Q44,gen(actions_taken)
label variable actions_taken "Actions Taken So Far"

encode Q45,gen(stock)
label variable stock "Hold Stocks"

gen risk_loving=real(Q46_1)
replace risk_loving=1 if strpos(Q46_1,"1")>0
replace risk_loving=7 if strpos(Q46_1,"7")>0
label variable risk_loving "Willingness to Take Risks"

gen discount=real(Q47_1)
replace discount=1 if strpos(Q47_1,"1")>0
replace discount=7 if strpos(Q47_1,"7")>0
label variable discount "Willingness to Wait"

gen trust=real(Q181)
replace trust=1 if strpos(Q181,"1")>0
replace trust=7 if strpos(Q181,"7")>0
label variable trust "Trust in People"

*** At the beginning of the survey we had a formatting mistake in the income question.
*** We managed to fix it soon after the survey had started, but some participants got the wrong screen
*** Here we fix the answer. We assume that people who got the wrong formatting ignored it or figured out (which seems reasonable)

gen income_fixed=Q48_1+Q212_1
replace income_fixed="$30,000 - $39,999" if income_fixed=="$30,000 - $29,999"
replace income_fixed="$40,000 - $49,999" if income_fixed=="$40,000 - $29,999"
replace income_fixed="$50,000 - $59,999" if income_fixed=="$50,000 - $29,999"
encode income_fixed,gen(income)
label variable income "HH Income"

encode Q169,gen(region)
label variable region "Region"

gen age_c=Q25

*** Truncate outliers at the top 99%

quietly sum age_c,d
replace age_c=`r(p99)' if age_c>`r(p99)'

replace  age_c=age_c/100
label variable age_c "Age (\(\div 100\))"

gen age_100=age_c*100
label variable age_100 "Age (18-81)"

gen worried=worried_economy+worried_health
label variable worried "Worried H+E"

gen worried_h_e=worried_health-worried_economy
label variable worried_h_e "Worried H-E"

gen any_cond = Q35!="" // could refine this -- e.g. 1 cond, 2 cond, 3 cond

label variable any_cond "Health Cond."


destring Q43_*, replace // nr people
replace Q43_4 = 2 if Q43_4 >2 // "2 or more" people above 65 

replace n_hh = 7 if n_hh>7

/// articles: 

g x = FL_130_DO + FL_139_DO + FL_144_DO + FL_148_DO 
encode x , gen(article_seen)


g pref_seen = (health_domain == 100 & ((pessimistic_health==0 & x=="OH") | (pessimistic_health==100 & x=="PH")) ) | ///
 (health_domain == 0 & ((pessimistic_econ==0 & x=="OE") | (pessimistic_econ==100 & x=="PE")) )

gen headline_ok= (headline_check==x)
label variable headline_ok "Headline Check"

// Informativeness:
tab Q17_1
destring Q17_1, ignore("Not informative at all Very") replace
// Reliability
tab Q18_1
destring Q18_1, ignore("Not reliable at all Completely") replace

levelsof x,local(X)

foreach x in `X' {

	gen `x'=(x=="`x'")

	gen `x'_pref=`x'*pref_seen

}

label variable OH "Opt. Health"
label variable PH "Pess. Health"
label variable OE "Opt. Econ."
label variable PE "Pess. Econ."

label variable OH_pref "Opt. Health $\times$ Pref."
label variable PH_pref "Pess. Health $\times$ Pref."
label variable OE_pref "Opt. Econ. $\times$ Pref."
label variable PE_pref "Pess. Econ. $\times$ Pref."

label variable pref_seen "Preferred"

///////////////////////////////////////////////
/// revisions (probabilistic)

compare Q23_1 Q7_1
g p_death_rev = Q23_1 - Q7_1
g Q7_1_100 = Q7_1 ==100 
g Q7_1_0   = Q7_1 ==0 

g p_deaths_rev=Q23_1 - Q7_1


compare Q25_1 Q11_1 
g p_ue_rev = Q25_1 - Q11_1 
g Q11_1_100 = Q11_1 ==100 
g Q11_1_0   = Q11_1 ==0 



*** Use names easier to remember

gen p_deaths_prior=Q7_1
label variable p_deaths_prior "Pr. Deaths. Prior"

gen p_deaths_posterior=Q23_1
label variable p_deaths_posterior "Pr. Deaths. Post."

gen p_ue_prior=Q11_1
label variable p_ue_prior "Pr. UE Prior"

gen p_ue_posterior=Q25_1
label variable p_ue_posterior "Pr. UE Post."


// lockdown support:

g support = substr(Q27,1,1)
destring support , replace
replace support = 6-support // code such that higher = more support for future lockdown

g support_sofar = substr(Q12_1 ,1,1)
destring support_sofar , replace

label variable support_sofar "Lockdown Support"

g d_support =support-support_sofar
label variable d_support "Support Diff."




*** Deaths and UE Forecasts


gen for_ue_prior=Q10





gen prob_ue_20_100=(Q11_1==100)
label variable prob_ue_20_100 "Prob. UE=100"

gen prob_ue_20_0=(Q11_1==0)
label variable prob_ue_20_0 "Prob. UE=0"

gen prob_deaths_200_100=(Q7_1==100)
label variable prob_deaths_200_100 "Prob. Deaths=100"
gen prob_deaths_200_0=(Q7_1==0)
label variable prob_deaths_200_0 "Prob. Deaths=0"


gen pess=(PH==1 | PE==1)
label variable pess "Pessimistic"


gen pess_pref=pess*pref_seen
label variable pess_pref "Pess. \(\times\) Pref."

*** Headlines order

encode Q14_DO,gen(econ_order)
replace econ_order=econ_order-1

encode Q15_DO,gen(health_order)
replace health_order=health_order-1




*** For predictive regressions, reduce a bit the number of regressors

*** Education

gen bachelor=(Q29=="Bachelor's Degree (For example: BA, BS)")
gen high_school=(Q29=="High school diploma (or equivalent)" ///
| Q29=="Associate/Junior College Degree (including academic, vocational, or occupational programs)" ///
| Q29=="Some college but no degree (including academic, vocational, or occupational programs")
replace high_school=1 if Q29_8_TEXT=="Vocational School (certificate)" | Q29_8_TEXT=="Still student in college "
gen more_than_college=(Q29=="Doctoral Degree"   | Q29=="Master's Degree (For example: MA, MBA, MS, MSW)")
replace more_than_college=1 if Q29_8_TEXT=="Law degree"
label variable high_school "At most high school"
label variable bachelor "Bachelor"
label variable more_than_college "More than bachelor"
replace high_school=1 if Q29=="Less than high school"

*** Simplified Education

gen college=more_than_college+ bachelor
label variable college "College"

*** Employment Conditions

gen retired=(Q30=="Retiree or early retiree"   | Q30_10_TEXT=="unemployed/retired")
gen part_time=  (Q30=="Working part-time (for someone or self-employed)" | Q30_10_TEXT=="prefer not to say" | Q30_10_TEXT=="Partially furloughed" )
gen full_time=  (Q30=="Working full-time (for someone or self-employed)"  | Q30_10_TEXT=="contract" | Q30_10_TEXT=="Self-employed" | Q30_10_TEXT=="Independent contractor"   )
gen student=  (Q30=="Student, at school, or in training")
gen homemaker=(Q30=="Homemaker")
gen not_work=(Q30=="Not working, but would like to work"  | Q30=="Temporarily laid off" |  Q30_10_TEXT=="Furloughed from wdw" | Q30_10_TEXT=="NA" )

replace not_work=1 if Q30_10_TEXT=="unemployed"  | Q30_10_TEXT=="Unemployed due to Covid 19" | Q30_10_TEXT=="Pimp"
replace not_work=1 if Q30_10_TEXT=="NA"   | Q30_10_TEXT=="prefer not to say"
replace not_work=1 if Q30_10_TEXT=="umempoylment"  | Q30_10_TEXT=="unemployed" 
gen not_work_not_looking=(Q30=="On sick or other leave" | Q30=="Permanently disabled or unable to work")
replace not_work_not_l=1 if  Q30_10_TEXT=="Unemployed and not looking for work " | Q30_10_TEXT=="disabled"

*** Simplified employment conditions

gen working=homemaker+part_time+full_time

gen not_working=not_work+not_work_not_l+student


label variable retired "Retired"
label variable part_time "Part Time"
label variable full_time "Full Time"
label variable not_work "Not Working"
label variable not_work_not_looking "Not Working Nor Looking"
label variable student "Student"

label variable not_working "Not Working"

label variable working "Working"

*** Political Affiliations

gen liberal_score=6 if political_views=="Very Liberal/Democrat":political_views
replace liberal_score=5 if political_views=="Liberal/Democrat":political_views
replace liberal_score=4 if political_views=="Leaning Liberal/Democrat":political_views
replace liberal_score=3 if political_views=="Libertarian":political_views
replace liberal_score=2 if political_views=="Conservative/Republican":political_views
replace liberal_score=1 if political_views=="Very Conservative/Republican":political_views

label variable liberal_score "Liberal Score (1-6)"

*** Simplified Political Affiliations


gen liberal=(political_views=="Very Liberal/Democrat":political_views | political_views=="Liberal/Democrat":political_views | political_views=="Leaning Liberal/Democrat":political_views)

gen conservative=(political_views=="Conservative/Republican":political_views | political_views=="Very Conservative/Republican":political_views)

gen libertarian=(political_views=="Libertarian":political_views)

label variable libertarian "Libertarian"

label variable conservative "Conservative"

label variable liberal "Liberal"

*** Race: From Andreas' code

// AF: generate indicators for Black, Asian, other minority, and then also Hispanic
gen race_black = regexm(Q33,"Black")
gen race_asian = regexm(Q33,"Asian")
gen race_other_min = regexm(Q33,"Indian")|regexm(Q33,"Hawa")

* br Q33* if regexm(Q33,"specify") // those that selected "other" -- often hispanic

g hispanic = Q32 =="Yes"

label variable race_black "Black"

label variable race_asian "Asian"

label variable race_other_min "Race - Other"

label variable hispanic "Hispanic"


*** Voting Plans

gen biden_voter=(Q38=="Joe Biden")
gen trump_voter=(Q38=="Donald Trump")
gen non_voter=(Q38=="I will not vote for anyone")
gen third_party_voter=(Q38=="Third party candidate")

label variable biden_voter "Vote Biden"
label variable trump_voter "Vote Trump"
label variable non_voter "Non Voter"
label variable third_party_voter "Third Party Voter"

*** Lockdown Severity

gen severity_lockdown=4 if actions_taken=="There is currently a stay-at-home order":actions_taken
replace severity_lockdown=3 if actions_taken=="There was a stay-at-home order which is being gradually lifted (in phases)":actions_taken
replace severity_lockdown=3 if Q44_6_TEXT!=""
replace severity_lockdown=2 if actions_taken=="There was a stay-at-home order which has been lifted entirely":actions_taken
replace severity_lockdown=1 if actions_taken=="There was never a stay-at-home order":actions_taken

replace severity_lockdown=0 if actions_taken=="Don't know":actions_taken

gen lockdown_not_know=(actions_taken=="Don't know":actions_taken)
label variable lockdown_not_know "Lockdown Severity - Not Know"


label variable severity_lockdown "Lockdown Severity"

*** Generate continuous income variable

gen income_scale=1 if income_fixed=="$10,000 - $19,999"
replace income_scale=2 if income_fixed=="$20,000 - $29,999"
replace income_scale=3 if income_fixed=="$30,000 - $39,999"
replace income_scale=4 if income_fixed=="$40,000 - $49,999"
replace income_scale=5 if income_fixed=="$50,000 - $59,999"
replace income_scale=6 if income_fixed=="$60,000 - $69,999"
replace income_scale=7 if income_fixed=="$70,000 - $79,999"
replace income_scale=8 if income_fixed=="$80,000 - $89,999"
replace income_scale=9 if income_fixed=="$90,000 - $99,999"
replace income_scale=10 if income_fixed=="$100,000 - $149,999"
replace income_scale=11 if income_fixed=="$150,000 - $199,999"
replace income_scale=12 if income_fixed=="$200,000 or more"
replace income_scale=0 if income_fixed=="Less than $10,000"

label variable income_scale "Income"

replace stock=stock-1

*** Evaluation of Article

gen reliability=Q18_1

gen informativeness=Q17_1

winsor time_spent_article_minutes,p(.`w') gen(time_article2)

gen money_spent=Q21_choice1_1



*** Build variables for revision regressions


gen deaths_posterior_nw=Q22/1000

winsor deaths_posterior_nw,p(.`w') gen(deaths_posterior)


gen deaths_prior_nw=real(subinstr(Q6,",","",.))/1000

winsor deaths_prior_nw,p(.`w') gen(deaths_prior)
label variable deaths_prior "Deaths Prior"

gen deaths_rev=deaths_posterior-deaths_prior



gen log_deaths_prior_nw=log(deaths_prior_nw)

gen log_deaths_prior=log(deaths_prior)

gen log_deaths_posterior_nw=log(deaths_posterior_nw)

gen log_deaths_posterior=log(deaths_posterior)

gen log_deaths_rev=log_deaths_posterior-log_deaths_prior

gen ue_prior=Q10
label variable ue_prior "UE Prior"

gen ue_100=(Q10==100)
label variable ue_100 "UE Prior=100"

gen ue_0=(Q10==0)
label variable ue_0 "UE Prior=0"


gen ue_posterior=Q24

gen ue_rev=ue_posterior-ue_prior

gen p_inf_rev=Q26_4-Q3_4

gen p_inf_prior=Q3_4


label variable p_inf_prior "Pr. Infected"

gen p_inf_posterior=Q26_4


gen inf_0=(p_inf_prior==0)

gen inf_100=(p_inf_prior==100)


*** Donations

gen donation_who=Q50_choice2_1
label variable donation_who "Donation to WHO"

gen donation_fw=Q50_choice2_2
label variable donation_fw "Donation to FreedomWorks"

*** Gen dummy for full sample (to distinguish from those who missed headline check)

gen all=1


*** Merge with county data

rename zip prop_zip

merge m:1 prop_zip using  zipTOcounty,keep(1 3)

gen fips=real(fips)

replace fips=45015 if prop_zip==29486

replace fips=48121 if prop_zip==75036

drop _merge


*** NYT has data for New York city, not its five counties, so we assign a fake FIPS code to its counties

replace fips=100000 if fips== 36005 | fips==36047  | fips== 36061  | fips==  36081 | fips== 36085 

preserve

use infections_population_alldates,clear

replace fips=100000 if county=="New York City"
drop if fips==.

gen day=day(date)
gen month=month(date)
gen double cases_pc=cases/population

gen double deaths_pc=deaths/population

local M 2 3 4 5 6 

foreach m in `M' {

	quietly sum day if month==`m'
	
	gen temp_d=deaths_pc if day==`r(max)' & month==`m'
	
	bysort fips: egen deaths_pc_`m'=mean(temp_d)
	
	quietly sum day if month==`m'
	
	gen temp_c=cases_pc if day==`r(max)' & month==`m'
	
	bysort fips: egen cases_pc_`m'=mean(temp_c)

	drop temp_c temp_d
	

}

*** Use lags to make sure individuals are aware of data on infections/deaths
replace date =date+1


keep fips date deaths_pc* cases_pc*

save temp,replace

restore

merge m:1 fips date using temp,keep(1 3)

drop _merge

*** Assume deaths and infections are equal to zero if unmatched; we can also drop these obs.

gen unmatched_county=1 if deaths_pc==.

replace deaths_pc=0 if deaths_pc==.
replace cases_pc=0 if cases_pc==.

replace deaths_pc=deaths_pc*1000
label variable deaths_pc "Deaths P.C. (\(\times\)1000)"

replace cases_pc=cases_pc*1000
label variable cases_pc "Cases P.C.  (\(\times\)1000)"



preserve

import excel bls_ur.xlsx, sheet("laucntycur14") firstrow  clear

gen fips_code=state_fips+ county_fips

replace period=substr(period,1,6)

split period,p("-")

gen year=real("20"+period2)

gen month=1 if period1=="Jan"
replace month=2 if period1=="Feb"
replace month=3 if period1=="Mar"
replace month=4 if period1=="Apr"
replace month=5 if period1=="May"
replace month=6 if period1=="Jun"
replace month=7 if period1=="Jul"
replace month=8 if period1=="Aug"
replace month=9 if period1=="Sep"
replace month=10 if period1=="Oct"
replace month=11 if period1=="Nov"
replace month=12 if period1=="Dec"

gen ue_county=real(ur)
label variable ue_county "County UR"

gen ym=ym(year,month)

egen id=group(fips_code)

drop if  id==.

xtset id ym

forval i=1/6 {

	gen ue_l`i'=l`i'.ue_county if month==4
	
	gen ue_f`i'=f`i'.ue_county if month==4

}

gen ue_l7=l7.ue_county if month==4


keep if month==4

gen ue_diff=ue_county-ue_l6

keep fips_code ue_*

save ue_county,replace

restore

merge m:1 fips_code using ue_county,keep(1 3)


*** Use national average if missing

replace ue_county=14.7 if ue_county==.

replace ue_diff=14.7-3.6 if ue_county==.


*** Run PCA for each sample; the local X includes all variables we want to put; for now, we include only the "bounded ones"
*** We also standardize it for ease of intepretation

label variable probability_infected "Prob. Infected"

*** Winsor also UR variables

local X nowcast_ue ue_prior

foreach x in `X' {

rename `x' `x'_nw

winsor `x'_nw,p(.`w') gen(`x')

}

local X  p_deaths_prior p_ue_prior deaths_prior ue_prior nowcast_deaths2 nowcast_ue probability_infected

foreach s in `S' {

	pca `X' if sample_`s'==1

	predict pc_`s' pc2_`s' pc3_`s' pc4_`s' if sample_`s'==1
	
	quietly sum pc_`s'
	
	replace pc_`s'=pc_`s'/`r(sd)'
	
	label variable pc_`s' "PC1"

}

/*** Fraction explained by first four principal components:

0.3406

0.2347

0.1943

0.1003

***/


*** Let's do the same for the ``assessment'' variables

local X reliability informativeness money

foreach s in `S' {

	pca `X' if sample_`s'==1

	predict pr_`s' if sample_`s'==1
	
	quietly sum pr_`s'
	
	replace pr_`s'=pr_`s'/`r(sd)'
	
	label variable pr_`s' "PC Reliability"

}


*** Long variable names give trouble when storing regression output... We shorten them a bit here

gen p_h=pessimistic_health
gen p_e=pessimistic_economy


**** Relabel main variables

label variable nowcast_deaths2 "Nowcast Deaths"
label variable deaths_prior "Forecast Deaths"
label variable p_deaths_prior "Pr(Deaths \(>\) 200k)"
label variable nowcast_ue "Nowcast UR"
label variable ue_prior "Forecast UR"
label variable p_ue_prior "Pr(UR \(>\) 20\%)"
label variable probability_infected "Pr(COVID inf.)"
label variable p_inf_posterior "Pr(COVID inf.) (Post.)"
label variable p_deaths_rev "\(\Delta\)Pr(Deaths \(>\) 200k)"
label variable p_ue_rev "\(\Delta\)Pr(UR \(>\) 20\%)"
label variable p_inf_rev "\(\Delta\)Pr. Infected"

gen white=(Q33=="White")
label variable white "White"

gen unemployed=(Q30=="Not working, but would like to work")
label variable unemployed "Unemployed"

gen lockdown=(actions_taken==3)

label variable lockdown "Stay at Home Order"

gen ur10=ue_county/10

label variable ur10 "County UR (\(\div 10\))"

replace health=-health+6

**** Generate time spent after reading article

gen time_after=0

forval i=13/14 {

	replace time_after=time_after+time`i'_PageSubmit

}

replace time_after=time_after+time15_choice1_PageSubmit



forval i=16/21 {

	replace time_after=time_after+time`i'_PageSubmit

}

replace time_after=time_after+time22_choice2_PageSubmit

replace time_after=time_after+time23_PageSubmit


replace time_after=time_after+Q173_PageSubmit



save data_wave1,replace

*** Erase intermediate datasets

erase temp.dta
erase  ue_county.dta
